# -*- coding: utf-8 -*-
import scrapy
from douban.items import DoubanItem

class DoubanSpiderSpider(scrapy.Spider):
    #这里是爬虫名
    name = 'douban_spider'
    #允许的域名
    allowed_domains = ['movie.douban.com']
    #入口url，扔到调度器里去
    start_urls = ['https://movie.douban.com/top250']

    def parse(self, response):
        movie_list = response.xpath("//div[@ class='article']//ol[@ class='grid_view']//li")
        for i_item in movie_list:
            douban_item = DoubanItem()
            douban_item['serial_number'] = i_item.xpath(".//div[@ class='item']//div[@ class='pic']//em/text()").extract_first()
            douban_item['movie_name'] = i_item.xpath(".//div[@ class='item']//div[@ class='info']//div[@ class='hd']//a//span[1]/text()").extract_first()
            contents = i_item.xpath(".//div[@ class='item']//div[@ class='info']//div[@ class='bd']//p[1]/text()").extract()
            for i_content in contents:
                content = "".join(i_content.split())
                print(content,"_")

